
####################################################
####          Replication Code for              ####
####     Spatial Interdependence                ####
####     and Instrumental Variable Models       ####
####################################################
####                                            ####
####                9/ 06/2018                  ####
####  this file runs rcode to create            ####
####            Figure 2 & 3 in MS              ####
####################################################


#### load simulation output
load("simulation_output_RR.rda")
summary(dat)



### some housekeeping: we change variable names
dat$Method = revalue(dat$Method, c("2sls-robust" = "2slsRobust"))
unique(dat$Method)

### We subset the output to drop 2sls with robust standard errors as these are not analyzed
### No subtantial improvement with robust SE
dat = dat[dat$Method %in% c("ols","2sls","SAR-iv"),]
### Next we subset the output to include only the simulations without misspecification of W
dat = dat[dat$misspec == 0, ]

### calculate coverage statistic, absolut and squared error
dat$coverage = 0
dat$coverage = ifelse(dat$treat <= (dat$Coef + (qnorm(0.975)*dat$SE)) & dat$treat >= (dat$Coef - (qnorm(0.975)*dat$SE)), 1, dat$coverage)
dat$abs.error = abs(dat$treat - dat$Coef)
dat$sqr.error = (dat$treat - dat$Coef)^2



### Calculate scenarios of the simulation when 2SLS is best
summary = ddply(dat, .(Method, rho.y, rho.z, cor, iv.coef, N), summarize, meanAbs = mean(abs.error), medianAbs = median(abs.error), coverage = mean(coverage),  mse = mean(sqr.error))
### median absolute error by method
medianAbsoluteError = spread(summary, Method, medianAbs)
### rename for R to be acceptable variable name
names(medianAbsoluteError)[which(names(medianAbsoluteError) == "2sls")] <- "sls"
### get rid of NAs to be able to compare methods
medianAbsoluteError = ddply(medianAbsoluteError, .(cor, rho.y, rho.z, iv.coef,N), summarize, sls = sum(sls, na.rm =TRUE), ols =  sum(ols, na.rm =TRUE), SARiv =  sum(`SAR-iv`, na.rm =TRUE))

### create indicator whether 2sls is best
medianAbsoluteError$sls_better = 0
medianAbsoluteError$sls_better = ifelse(medianAbsoluteError$SARiv >  medianAbsoluteError$sls , 1, medianAbsoluteError$sls_better)
summary(medianAbsoluteError)
### see scenarios when 2sls is better
medianAbsoluteError[medianAbsoluteError$sls_better ==1,]
### maximum difference between 2sls and spatial 2sls when 2sls is better (mentioned in section 5
max(abs(medianAbsoluteError$sls[medianAbsoluteError$sls_better ==1] - medianAbsoluteError$SARiv[medianAbsoluteError$sls_better ==1]))




### Next, create plots for manuscript analyzing the simulations

### First, subset data to scenario analyzed in manuscript
### Strong IV, N = 200,
data_iv_strong = subset(dat, iv.coef == 1.5 & N == 200)
#### Calculate summary stats for Figure
data_plot = ddply(data_iv_strong, .(Method, rho.y, rho.z, cor ), summarize, meanAbs = mean(abs.error), medianAbs = median(abs.error), coverage = mean(coverage),  mse = mean(sqr.error))



### Rename spatial 2sls for the plot
### Create Variable that will serve as Label for the Row
data_plot$Method = revalue(data_plot$Method, c("SAR-iv" = "s-2sls"))
data_plot$nonspat = case_when(data_plot$cor == -0.5 ~ "non-spatial \n endogeneity = -0.5", data_plot$cor == 0 ~ "non-spatial \n endogeneity = 0", data_plot$cor == 0.5 ~"non-spatial \n endogeneity = 0.5")

### Make plot for Median Absolute Error
p = ggplot(data = data_plot, aes(y = medianAbs, x = rho.y))
p = p + geom_point(aes(colour = Method, shape = Method), size = 4.5, alpha = 1, position=position_dodge(width = 0.1))
p = p + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p = p + labs(title = "", y="Median Absolute Error", x = expression(rho[y]))+ theme(axis.text.x=element_text(size=13, face = "bold")) + theme(axis.text.y=element_text(size=15, face = "bold")) + theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20))
p = p + scale_colour_brewer(palette = "Set1",name = "Estimation Method") + scale_shape_manual(name = "Estimation Method", values = c(15, 17, 16))
p = p + theme(legend.position = "none",legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 15), legend.title = element_text(size = 15), panel.border = element_rect(colour = "black", fill=NA, size=2.5))
p = p + facet_grid(nonspat~ rho.z, labeller = label_bquote(cols = rho[z] ~ "=" ~ .(rho.z))) +  theme(strip.background = element_blank(), strip.placement = "outside") + theme(strip.text.x = element_text(size=14, face = "bold"), strip.text.y = element_text(size=12, face="bold", angle = 45)) + scale_x_continuous(limits = c(-0.1,0.7), breaks = c(0,0.3,0.6),labels = c(0,0.3,0.6))
p = p + theme(legend.position = "bottom", legend.direction = "horizontal",legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 24), legend.title = element_text(size = 24), panel.border = element_rect(colour = "black", fill=NA, size=3.5))
p = p + theme(panel.spacing = unit(0.95, "lines"))
plot(p)
### save plot
ggsave("Figure2.pdf", width = 6 * 1.618, height = 6)


### Creat plot for coverage
p = ggplot(data = data_plot, aes(y = coverage, x = rho.y))
p = p + geom_point(aes(colour = Method, shape = Method), size = 4.5, alpha = 1, position=position_dodge(width = 0.1))
p = p + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p = p + labs(title = "", y="Coverage", x = expression(rho[y]))+ theme(axis.text.x=element_text(size=13, face = "bold")) + theme(axis.text.y=element_text(size=15, face = "bold")) + theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20))
p = p + scale_colour_brewer(palette = "Set1",name = "Estimation Method") + scale_shape_manual(name = "Estimation Method", values = c(15, 17, 16))
p = p + theme(legend.position = "none",legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 15), legend.title = element_text(size = 15), panel.border = element_rect(colour = "black", fill=NA, size=2.5))
p = p + facet_grid(nonspat~ rho.z, labeller = label_bquote(cols = rho[z] ~ "=" ~ .(rho.z))) +  theme(strip.background = element_blank(), strip.placement = "outside") + theme(strip.text.x = element_text(size=14, face = "bold"), strip.text.y = element_text(size=12, face="bold", angle = 45)) + scale_x_continuous(limits = c(-0.1,0.7), breaks = c(0,0.3,0.6),labels = c(0,0.3,0.6))
p = p + theme(legend.position = "bottom", legend.direction = "horizontal",legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 24), legend.title = element_text(size = 24), panel.border = element_rect(colour = "black", fill=NA, size=3.5)) +  scale_y_continuous(limits = c(0,1.25), breaks = c(0,0.5,0.95))
p = p + theme(panel.spacing = unit(0.95, "lines")) + geom_hline(aes(yintercept = 0.95),col = "red")
plot(p)
ggsave("Figure3.pdf", width = 6 * 1.618, height = 6)

